Chris Bail
Duke University
website: https://www.chrisbail.net
Twitter: https://www.twitter.com/chris_bail
github: https://github.com/cbail
library(tidytext)
library(dplyr)
load(url("https://cbail.github.io/Trump_Tweets.Rdata"))
tidy_trump_tweets<- trumptweets %>%
select(created_at,text) %>%
unnest_tokens("word", text)
data("stop_words")
top_words<-
tidy_trump_tweets %>%
anti_join(stop_words) %>%
filter(!(word=="https"|
word=="rt"|
word=="t.co"|
word=="amp")) %>%
count(word) %>%
arrange(desc(n))
library(ggplot2)
top_words %>%
slice(1:20) %>%
ggplot(aes(x=reorder(word, -n), y=n, fill=word))+
geom_bar(stat="identity")+
theme_minimal()+
theme(axis.text.x =
element_text(angle = 60, hjust = 1, size=13))+
theme(plot.title =
element_text(hjust = 0.5, size=18))+
ylab("Frequency")+
xlab("")+
ggtitle("Most Frequent Words in Trump Tweets")+
guides(fill=FALSE)
tidy_trump_tfidf<- trumptweets %>%
select(created_at,text) %>%
unnest_tokens("word", text) %>%
anti_join(stop_words) %>%
count(word, created_at) %>%
bind_tf_idf(word, created_at, n)
top_tfidf<-tidy_trump_tfidf %>%
arrange(desc(tf_idf))
top_tfidf$word[1]
[1] "standforouranthem"
economic_dictionary<-c("economy","unemployment","trade","tariffs")
library(stringr)
economic_tweets <-
trumptweets %>%
filter(str_detect(text, economic_dictionary))
head(economic_tweets$text, 3)
[1] "Great talk with my friend President Mauricio Macri of Argentina this week. He is doing such a good job for Argentina. I support his vision for transforming his country’s economy and unleashing its potential!"
[2] "The Washington Post and CNN have typically written false stories about our trade negotiations with China. Nothing has happened with ZTE except as it pertains to the larger trade deal. Our country has been losing hundreds of billions of dollars a year with China..."
[3] "China and the United States are working well together on trade, but past negotiations have been so one sided in favor of China, for so many years, that it is hard for them to make a deal that benefits both countries. But be cool, it will all work out!"
head(get_sentiments("bing"))
# A tibble: 6 x 2
word sentiment
<chr> <chr>
1 2-faced negative
2 2-faces negative
3 a+ positive
4 abnormal negative
5 abolish negative
6 abominable negative
trump_tweet_sentiment <-
tidy_trump_tweets %>%
inner_join(get_sentiments("bing")) %>%
count(created_at, sentiment)
head(trump_tweet_sentiment)
# A tibble: 6 x 3
created_at sentiment n
<dttm> <chr> <int>
1 2017-02-05 22:49:42 positive 2
2 2017-02-06 03:36:54 positive 4
3 2017-02-06 12:01:53 negative 3
4 2017-02-06 12:01:53 positive 1
5 2017-02-06 12:07:55 negative 2
6 2017-02-06 16:32:24 negative 3
tidy_trump_tweets$date<-as.Date(
tidy_trump_tweets$created_at,
format="%Y-%m-%d %x")
trump_sentiment_plot <-
tidy_trump_tweets %>%
inner_join(get_sentiments("bing")) %>%
filter(sentiment=="negative") %>%
count(date, sentiment)
library(ggplot2)
ggplot(trump_sentiment_plot, aes(x=date, y=n))+
geom_line(color="red", size=.5)+
theme_minimal()+
theme(axis.text.x =
element_text(angle = 60, hjust = 1, size=13))+
theme(plot.title =
element_text(hjust = 0.5, size=18))+
ylab("Number of Negative Words")+
xlab("")+
ggtitle("Negative Sentiment in Trump Tweets")+
theme(aspect.ratio=1/4)
From Goncalves et al. (2013)
From Ribiero et al. (2016)
From Goncalves et al. (2013)